{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "1. 获取转移矩阵\n",
    "2. 随机游走"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-09-26T11:31:07.642666Z",
     "start_time": "2020-09-26T11:31:06.830807Z"
    }
   },
   "outputs": [],
   "source": [
    "from scipy.sparse import coo_matrix\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "# 数据处理示例\n",
    "data_path=\"../data/ml-100k/u.data\"\n",
    "data_fields = ['user_id', 'item_id', 'rating', 'timestamp']\n",
    "data_df = pd.read_table(data_path, names=data_fields)\n",
    "\n",
    "# get user number\n",
    "n_users = max(data_df['user_id'].values)\n",
    "# get item number\n",
    "n_items = max(data_df['item_id'].values)\n",
    "\n",
    "\n",
    "data = np.ones((data_df.shape[0]))\n",
    "data=data_df.rating.values\n",
    "row = data_df.user_id-1\n",
    "col = data_df.item_id-1\n",
    "UI = coo_matrix((data, (row, col)), shape=(n_users, n_items))\n",
    "UIUI = UI.dot(UI.transpose()).dot(UI)\n",
    "\n",
    "# 将UI转换成邻接矩阵\n",
    "UI=UI.toarray()\n",
    "UI=(UI>0).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# X: 输入的转移矩阵，如UI矩阵\n",
    "# alpha: 重启系数\n",
    "def randomWalk(X, alpha=0.9, num_walk=2):\n",
    "    num_s,num_e=X.shape\n",
    "    X_norm=X.sum(1,keepdims=True)\n",
    "    \n",
    "    # 初始化所有节点的状态\n",
    "    Z=np.zeros((num_s,num_s+num_e))\n",
    "    for i in range(num_s):\n",
    "        Z[i,i]=1\n",
    "    Z_0=Z.copy()\n",
    "    \n",
    "    # 初始化转移矩阵P，P是(num_s+num_e)*(num_s+num_e)的矩阵\n",
    "    P=np.concatenate((np.zeros((num_s,num_s)),X),axis=1)\n",
    "    P=np.concatenate((P,np.zeros((num_e,P.shape[1]))),axis=0)\n",
    "    P_norm=P.sum(1,keepdims=True) # 归一化，等概率随机游走\n",
    "    P=P/P_norm\n",
    "    for i in range(P.shape[0]): # 对于全零行进行特殊处理\n",
    "        if np.isnan(P[i][0]):\n",
    "            P[i,:num_s]=1/num_s\n",
    "            P[i,num_s:]=1/num_e\n",
    "            \n",
    "    Z=np.mat(Z)\n",
    "    P=np.mat(P)\n",
    "    for i in range(num_walk):\n",
    "        Z=alpha*Z*P+(1-alpha)*Z_0\n",
    "    return Z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:17: RuntimeWarning: invalid value encountered in true_divide\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "matrix([[0.10085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157],\n",
       "        [0.00085896, 0.10085896, 0.00085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157],\n",
       "        [0.00085896, 0.00085896, 0.10085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157],\n",
       "        ...,\n",
       "        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157],\n",
       "        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157],\n",
       "        [0.00085896, 0.00085896, 0.00085896, ..., 0.00048157, 0.00048157,\n",
       "         0.00048157]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res=randomWalk(UI, alpha=0.9, num_walk=2)\n",
    "res"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
